Initial Set Up Steps

Set up contains all the libraries needed to run the code, some important global variables, and preliminary calculations needed in our analysis.

# mcc codes
read_csv("https://raw.githubusercontent.com/greggles/mcc-codes/main/mcc_codes.csv") %>%
  dplyr::select(
    MCC = mcc,
    label = edited_description
  ) %>%
  saveRDS("rds/mcc_codes.rds")

# spending mcc only up to July (month 7)
spending_MCC %>%
  group_by(year(date),month(date),MCC,label) %>%
  summarize(
    sum_trans = sum(as.numeric(transaction_counts), na.rm=T),
    avg_doll_per_trans=mean(as.numeric(total_spent) / as.numeric(transaction_counts), na.rm=T)) %>% 
  saveRDS("baymap/rds/spending_MCC_year.rds")

spending_MCC %>%
  group_by(year(date),wday(date),MCC,label) %>%
  summarize(avg_doll_per_trans=mean(as.numeric(total_spent) / as.numeric(transaction_counts), na.rm=T)) %>% 
  saveRDS("baymap/rds/spending_MCC_day.rds")

# spending mcc by zip for 2020
spending_MCC %>% 
  filter(year(date) == 2020) %>% 
  group_by(month(date),zip,MCC,label) %>%
  summarize(
    avg_doll_per_trans=mean(as.numeric(total_spent) / as.numeric(transaction_counts), na.rm=T)) %>% 
  left_join(bay_zip %>% select(ZIP_CODE,PO_NAME,COUNTY),by=c("zip"="ZIP_CODE")) %>% 
  saveRDS("baymap/rds/spending_MCC_zip.rds")
# amazon + walmart reach - all of CA
az <- 
  read.csv("references/amazonfresh_zipcodes.csv",colClasses=c("character","character")) %>% 
  # filter(Zip %in% bay_zip$ZIP_CODE) %>%
  # left_join(bay_zip, by=c("Zip"="ZIP_CODE")) %>%
  left_join(ca_zip, by=c("Zip"="ZIP_CODE")) %>%
  st_as_sf(crs=4326) %>% 
  mutate(area = st_area(.)) %>% 
  summarise(area=sum(area))

w <- 
  read.csv("references/walmart_delivery_stores_ca.csv") %>% 
  # filter(Zip.Code %in% bay_zip$ZIP_CODE) %>% 
  st_as_sf(coords=c("Longitude", "Latitude"), crs=4326)

wz <- 
  w %>% 
  st_transform(crs = CA_ALBERS) %>% 
  st_buffer(dist = 9 * 1609.344) %>% 
  st_transform(crs=4326) %>% 
  # st_intersection(bay_zip) %>%
  st_intersection(ca_zip) %>%
  mutate(area = st_area(.)) %>% 
  summarise(area=sum(area))

list("amz_zip"=az,"wmt"=w,"wmt_zip"=wz) %>% saveRDS('baymap/rds/amz_wmt_zip.rds')

az %>% 
  st_union(wz) %>% 
  saveRDS('rds/amz_wmt.rds')
# affected population
az_wz <- readRDS('rds/amz_wmt.rds')
az_wz_zip <- readRDS('baymap/rds/amz_wmt_zip.rds')

snap_county <- 
  read_excel("references/CalFresh Data/CFDashboardData.xlsx",skip=1) %>% 
  filter(Date == as.Date("2020-05-01")) %>%
  select(County,`CalFresh Persons`) %>% 
  left_join(ca_counties, by=c("County"="NAME")) %>%
  st_as_sf(crs=4326)

mapped <- function(x, data){
  counties_union <-  
    ca_counties %>% 
    filter(NAME == x) %>% 
    st_cast("MULTIPOLYGON") %>% 
    st_intersection(st_make_valid(data))
  if (nrow(counties_union)==0){
    return(NA)
  }
  result <- 
    snap_county %>% 
      filter(County == x) %>%
      st_cast("MULTIPOLYGON") %>% 
      .[,"CalFresh Persons"] %>% 
      st_interpolate_aw(counties_union , extensive=T) %>% 
      pull("CalFresh.Persons") %>% 
      round(digits=0)
  return(result)
}

snap_county %>% 
  mutate(
    # `Reached by Walmart` =
    #   snap_county$County %>%
    #   map(mapped,data=az_wz_zip$wmt_zip) %>%
    #   unlist(),
    # `Reached by Amazon` =
    #   snap_county$County %>%
    #   map(mapped,data=az_wz_zip$amz_zip) %>%
    #   unlist(),
    `Total Reached` = 
      snap_county$County %>%  
      map(mapped,data=az_wz) %>% 
      unlist(),
    `% Reached` = round(100 * `Total Reached` / `CalFresh Persons`,1)
  ) %>% 
  as.data.frame() %>% 
  select(County, `CalFresh Persons`, `Total Reached`, `% Reached`) %>% 
  replace_na(list(`Total Reached`=0, `% Reached`=0)) %>% 
  # select(County, `CalFresh Persons`, `Reached by Amazon`, `Reached by Walmart`,`Total Reached`, `% Reached`) %>% 
  # replace_na(list(`Reached by Amazon`=0, `Reached by Walmart`=0,`Total Reached`=0, `% Reached`=0)) %>% 
  saveRDS('baymap/rds/amz_wmt_reach.rds')
get_trend <- function(data,name){
  AMT <- 
    data[data$TOTAL_AMT > 0,] %>% 
    group_by(month(DATE), TRANSACTION_MODE) %>% 
    summarize(
      AVG_AMT=mean(as.numeric(TOTAL_AMT), na.rm=T),
      AVG_TRX_CNT=mean(as.numeric(TOTAL_TRX_CNT), na.rm=T),
      AVG_AMT_PER_TRX = mean(as.numeric(TOTAL_AMT) / as.numeric(TOTAL_TRX_CNT), na.rm=T),
      TOTAL_TRX_CNT=sum(as.numeric(TOTAL_TRX_CNT), na.rm=T)
    )
  GROCERY_AMT <- 
    data[data$TOTAL_GROCERY_SUPERMARKET_AMT > 0,] %>% 
    group_by(month(DATE), TRANSACTION_MODE) %>% 
    summarize(
      AVG_GROCERY_AMT=mean(as.numeric(TOTAL_GROCERY_SUPERMARKET_AMT), na.rm=T),
      TOTAL_GROCERY_TRX_CNT=sum(as.numeric(TOTAL_GROCERY_SUPERMARKET_TRX_CNT), na.rm=T),
      AVG_GROCERY_AMT_PER_TRX = mean(as.numeric(TOTAL_GROCERY_SUPERMARKET_AMT) / as.numeric(TOTAL_GROCERY_SUPERMARKET_TRX_CNT), na.rm=T)
    )
  
  AMT %>% 
    left_join(GROCERY_AMT, by=c("month(DATE)","TRANSACTION_MODE")) %>% 
    mutate(
      store=name
    )
}

wmt_trend <- get_trend(wmt,"WMT")
amz_trend <- get_trend(amz,"AMZ")

wmt_trend %>% 
  rbind(amz_trend) %>% 
  replace_na(list(AVG_GROCERY_AMT=0,TOTAL_GROCERY_TRX_CNT=0,AVG_GROCERY_AMT_PER_TRX=0)) %>% 
  saveRDS("baymap/rds/amz_wmt_trend.rds")

wmt %>% 
  filter(year(as.Date(DATE)) == 2020) %>% 
  filter(TRANSACTION_MODE == "Online") %>% 
  mutate(store="WMT") %>% 
  group_by(CRD_HLDR_ZIP5) %>%
  summarize(
    TOTAL_TRX_CNT=sum(as.numeric(TOTAL_TRX_CNT), na.rm=T)) %>% 
  left_join(ca_zip,by=c("CRD_HLDR_ZIP5"="ZIP_CODE")) %>% 
  select(CRD_HLDR_ZIP5,PO_NAME,TOTAL_TRX_CNT,geometry) %>%
  st_as_sf(crs=4326) %>% 
  saveRDS("baymap/rds/wmt_online_zip.rds")

Amazon + Walmart Reach

Pink areas are Amazon Fresh delivery coverage by zip code provided by CAFB. Blue areas represent the 9 mile delivery radius from Walmart Grocery Pick-up locations. Overlapping these two geometries together, we can find the approximate SNAP population reached in each county through area-weighted interpolation. Some key insights are highlighted below:

We’d also like to note that this method works better with higher levels of granularity (e.g. census tracts or census block groups) or urban dense communities. Right now, we only have access to May 2020 SNAP participation on a county-level.

amz_wmt_zip <- readRDS('baymap/rds/amz_wmt_zip.rds')

leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(
    data = amz_wmt_zip$amz_zip ,
    color = "#e851a2",
    fillOpacity = 0.5,
    weight=1,
    group="Amazon"
  ) %>% 
  addPolygons(
    data = amz_wmt_zip$wmt_zip,
    color = "#56bedf",
    fillOpacity = 0.5,
    weight=1,
    group="Walmart"
  ) %>%
  addCircleMarkers(
    data = amz_wmt_zip$wmt,
    color = "black",
    radius = 2,
    label = ~Address,
    group = "Walmart"
  ) %>% 
  addPolygons(
    data = ca_counties,
    fillOpacity = 0,
    color = "black",
    weight = 1.5,
    dashArray = 3,
    label = as.character(ca_counties$NAME),
    labelOptions = labelOptions(style=list('font-weight'='bold','text-transform'='uppercase')),
    group = "County Lines"
  ) %>% 
  addLayersControl(
    overlayGroups = c("Amazon","Walmart","County Lines")
  )
# table of reach
amz_wmt_reach <- readRDS('baymap/rds/amz_wmt_reach.rds')
# amz_wmt_reach[1,"Reached by Amazon"] <- sum(amz_wmt_reach$`Reached by Amazon`, na.rm=T)
# amz_wmt_reach[1,"Reached by Walmart"] <- sum(amz_wmt_reach$`Reached by Walmart`, na.rm=T)
amz_wmt_reach[1,"Total Reached"] <- sum(amz_wmt_reach$`Total Reached`, na.rm=T)
amz_wmt_reach[1,"% Reached"] <- round(100 * amz_wmt_reach[1,"Total Reached"] / amz_wmt_reach[1,"CalFresh Persons"],1)
knitr::kable(amz_wmt_reach)
County CalFresh Persons Total Reached % Reached
Statewide 4727883 2029295 42.9
Alameda 134219 104212 77.6
Alpine 148 0 0.0
Amador 2932 0 0.0
Butte 32491 0 0.0
Calaveras 5590 0 0.0
Colusa 1934 0 0.0
Contra Costa 73309 64830 88.4
Del Norte 5902 0 0.0
El Dorado 13305 576 4.3
Fresno 286792 29902 10.4
Glenn 3634 0 0.0
Humboldt 24681 0 0.0
Imperial 41901 4028 9.6
Inyo 2196 0 0.0
Kern 172901 10483 6.1
Kings 26337 4692 17.8
Lake 13771 0 0.0
Lassen 3455 0 0.0
Los Angeles 1383112 859488 62.1
Madera 29751 1153 3.9
Marin 13345 111 0.8
Mariposa 2658 0 0.0
Mendocino 13321 0 0.0
Merced 56379 14497 25.7
Modoc 1491 0 0.0
Mono 1010 0 0.0
Monterey 46322 2 0.0
Napa 7542 1114 14.8
Nevada 9114 0 0.0
Orange 242909 242617 99.9
Placer 16886 2912 17.2
Plumas 2401 0 0.0
Riverside 282116 79354 28.1
Sacramento 227892 170036 74.6
San Benito 5444 164 3.0
San Bernardino 357205 30152 8.4
San Diego 331073 110303 33.3
San Francisco 79157 78466 99.1
San Joaquin 105222 33146 31.5
San Luis Obispo 19086 0 0.0
San Mateo 27782 14201 51.1
Santa Barbara 46093 4533 9.8
Santa Clara 96962 64650 66.7
Santa Cruz 28816 990 3.4
Shasta 26412 1746 6.6
Sierra 296 0 0.0
Siskiyou 7964 0 0.0
Solano 44165 32172 72.8
Sonoma 31184 342 1.1
Stanislaus 80723 24004 29.7
Sutter 12540 4080 32.5
Tehama 10541 0 0.0
Trinity 2068 0 0.0
Tulare 118467 6505 5.5
Tuolumne 5564 0 0.0
Ventura 72388 24831 34.3
Yolo 21870 5207 23.8
Yuba 15114 3796 25.1

Facteus vs. SNAP